In [ ]:
from twython import TwythonStreamer
import string, json, pprint
import urllib
from datetime import datetime
from datetime import date
from time import *
import string, os, sys, subprocess, time
import psycopg2
import re
from osgeo import ogr
In [ ]:
# get access to the twitter API
APP_KEY = 'fQCYxyQmFDUE6aty0JEhDoZj7'
APP_SECRET = 'ZwVIgnWMpuEEVd1Tlg6TWMuyRwd3k90W3oWyLR2Ek1tnjnRvEG'
OAUTH_TOKEN = '824520596293820419-f4uGwMV6O7PSWUvbPQYGpsz5fMSVMct'
OAUTH_TOKEN_SECRET = '1wq51Im5HQDoSM0Fb5OzAttoP3otToJtRFeltg68B8krh'
We are going to use a PostGis database, which requires you to have an empty database. Enter these steps into the terminal to set up you databse. In this example we use "demo" as the name of our database. Feel free to give you database another name, but replace "demo" with the name you have chosen.
psql -d postgres"
postgres=# CREATE DATABASE demo;
postgres=# \c demo
demo=# create extension postgis;
demo=# CREATE TABLE tweets (id serial primary key, tweet_id BIGINT, text varchar(140), date DATE, time TIME, geom geometry(POINT,4326) );
In [ ]:
dbname = "demo"
user = "user"
password = "user"
table = "tweets"
In [ ]:
def insert_into_DB(tweet_id, tweet_text, tweet_date, tweet_time, tweet_lat, tweet_lon):
try:
conn = psycopg2.connect(dbname = dbname, user = user, password = password)
cur = conn.cursor()
# enter stuff in database
sql = "INSERT INTO " + str(table) + " (tweet_id, text, date, time, geom) \
VALUES (" + str(tweet_id) + ", '" + str(tweet_text) + "', '" + str(tweet_date) + "', '" + str(tweet_time) + "', \
ST_GeomFromText('POINT(" + str(tweet_lon) + " " + str(tweet_lat) + ")', 4326))"
cur.execute(sql)
conn.commit()
conn.close()
except psycopg2.DatabaseError, e:
print 'Error %s' % e
In [ ]:
def remove_link(text):
pattern = r'(https://)'
matcher = re.compile(pattern)
match = matcher.search(text)
if match != None:
text = text[:match.start(1)]
return text
In [ ]:
#Class to process JSON data comming from the twitter stream API. Extract relevant fields
class MyStreamer(TwythonStreamer):
def on_success(self, data):
tweet_lat = 0.0
tweet_lon = 0.0
tweet_name = ""
retweet_count = 0
if 'id' in data:
tweet_id = data['id']
if 'text' in data:
tweet_text = data['text'].encode('utf-8').replace("'","''").replace(';','')
tweet_text = remove_link(tweet_text)
if 'coordinates' in data:
geo = data['coordinates']
if geo is not None:
latlon = geo['coordinates']
tweet_lon = latlon[0]
tweet_lat = latlon[1]
if 'created_at' in data:
dt = data['created_at']
tweet_datetime = datetime.strptime(dt, '%a %b %d %H:%M:%S +0000 %Y')
tweet_date = str(tweet_datetime)[:11]
tweet_time = str(tweet_datetime)[11:]
if 'user' in data:
users = data['user']
tweet_name = users['screen_name']
if 'retweet_count' in data:
retweet_count = data['retweet_count']
if tweet_lat != 0:
# call function to write to DB
insert_into_DB(tweet_id, tweet_text, tweet_date, tweet_time, tweet_lat, tweet_lon)
def on_error(self, status_code, data):
print "OOPS FOUTJE: " +str(status_code)
#self.disconnect
In [ ]:
def main():
try:
stream = MyStreamer(APP_KEY, APP_SECRET,OAUTH_TOKEN, OAUTH_TOKEN_SECRET)
print 'Connecting to twitter: will take a minute'
except ValueError:
print 'OOPS! that hurts, something went wrong while making connection with Twitter: '+str(ValueError)
# Filter based on bounding box see twitter api documentation for more info
try:
stream.statuses.filter(locations='-0.351468, 51.38494, 0.148271, 51.672343')
except ValueError:
print 'OOPS! that hurts, something went wrong while getting the stream from Twitter: '+str(ValueError)
if __name__ == '__main__':
main()